library(tidyverse)
library(tigris)
library(censusapi)
library(sf)
library(mapview)
library(plotly)
library(lehdr)
library(leaflet)
options(
tigris_class = "sf",
tigris_use_cache = T
)
Sys.setenv(CENSUS_KEY="10dcd73d7c043e91bac9fb8d3989cbff54b08790")
Using the LODES dataset and the list of essential workers to map the distribution of essential workers by block group in San Jose
First get the block groups in San Jose
scc_blockgroups <- block_groups("CA","Santa Clara", cb=F, progress_bar=F)
# Use tracts sent to us by San Jose
sj_tracts <- st_read("/Users/simonespeizer/pCloud Drive/Shared/SFBI/Data Library/San_Jose/CSJ_Census_Tracts/CSJ_Census_Tracts.shp") %>%
st_as_sf() %>%
st_transform(st_crs(scc_blockgroups))
## Reading layer `CSJ_Census_Tracts' from data source `/Users/simonespeizer/pCloud Drive/Shared/SFBI/Data Library/San_Jose/CSJ_Census_Tracts/CSJ_Census_Tracts.shp' using driver `ESRI Shapefile'
## Simple feature collection with 219 features and 9 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 6112856 ymin: 1869687 xmax: 6255982 ymax: 1996555
## epsg (SRID): 2227
## proj4string: +proj=lcc +lat_1=38.43333333333333 +lat_2=37.06666666666667 +lat_0=36.5 +lon_0=-120.5 +x_0=2000000.0001016 +y_0=500000.0001016001 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=us-ft +no_defs
sj_citycouncil_disticts <- st_read("/Users/simonespeizer/pCloud Drive/Shared/SFBI/Data Library/San_Jose/City Council Districts/CITY_COUNCIL_DISTRICTS.shp") %>%
st_as_sf() %>%
st_transform(st_crs(scc_blockgroups))
## Reading layer `CITY_COUNCIL_DISTRICTS' from data source `/Users/simonespeizer/pCloud Drive/Shared/SFBI/Data Library/San_Jose/City Council Districts/CITY_COUNCIL_DISTRICTS.shp' using driver `ESRI Shapefile'
## Simple feature collection with 10 features and 7 fields
## geometry type: POLYGON
## dimension: XY
## bbox: xmin: 6112856 ymin: 1869687 xmax: 6255982 ymax: 1996555
## epsg (SRID): 2227
## proj4string: +proj=lcc +lat_1=38.43333333333333 +lat_2=37.06666666666667 +lat_0=36.5 +lon_0=-120.5 +x_0=2000000.0001016 +y_0=500000.0001016001 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=us-ft +no_defs
# from code written by others to get SJ blockgroups
sj_blockgroups <-
scc_blockgroups %>%
st_centroid() %>%
st_join(sj_tracts, left = F) %>%
st_join(sj_citycouncil_disticts%>% dplyr::select(DISTRICTS)) %>%
mutate(
DISTRICTS = DISTRICTS %>% factor(levels = c("1","2","3","4","5","6","7","8","9","10"))
) %>%
st_set_geometry(NULL) %>%
left_join(scc_blockgroups%>% dplyr::select(GEOID), by = "GEOID") %>%
st_as_sf() %>%
dplyr::select(GEOID, DISTRICTS)
# the spatial join leaves off two blockgroups which are touching district 9. The following code assigns those to district 9
sj_blockgroups$DISTRICTS[is.na(sj_blockgroups$DISTRICTS)] <- 9
sj_boundary <-
places("CA", cb=F, progress_bar=F) %>%
filter(NAME == "San Jose")
Next obtain the distribution of workers from the LODES dataset
# define the LODES categories, from https://lehd.ces.census.gov/data/lodes/LODES7/LODESTechDoc7.3.pdf
LODES_variable <- c('CNS01', 'CNS02', 'CNS03', 'CNS04', 'CNS05', 'CNS05', 'CNS05', 'CNS06', 'CNS07', 'CNS07', 'CNS08', 'CNS08', 'CNS09', 'CNS10', 'CNS11', 'CNS12', 'CNS13', 'CNS14', 'CNS15', 'CNS16', 'CNS17', 'CNS18', 'CNS19', 'CNS20')
LODES_NAICS <- c('11', '21', '22', '23', '31', '32', '33', '42', '44', '45', '48', '49', '51', '52', '53', '54', '55', '56', '61', '62', '71', '72', '81', '92')
LODES_keys <- data.frame(LODES_variable, LODES_NAICS)
# get the LODES data
sj_rac <-
grab_lodes(
state = "ca",
year = 2017,
lodes_type = "rac",
job_type = "JT01",
segment = "S000",
state_part = "main",
agg_geo = "bg"
) %>%
left_join(sj_blockgroups, by = c("h_bg" = "GEOID")) %>%
filter(!is.na(DISTRICTS)) %>%
dplyr::select(-c(contains("CE"),contains("CA"),contains("CR"), contains("CT"),contains("CS"),contains("CD")))
Now we look at the breakdown of essential workers.
Method 1: Use the QWI data to find the distribution of workers in NAICS 4-digit codes in Santa Clara County, and use this to weight the contributions to each 2-digit code. Note that this method disregards information about 6 digit codes within the 4 digit code that differ from the broader 4 digit code; this could be adjusted, but without data on the distribution of 6 digit within 4 digit I thought it was best to leave out.
# get distribution of jobs in Santa Clara County
label_industry <-
read_csv("/Users/simonespeizer/pCloud Drive/Shared/SFBI/Data Library/NAICS/label_industry.csv")
qwi_scc <- NULL
for(years in 2017){
qwi<-
getCensus(
name = "timeseries/qwi/sa",
region = "county:085",
regionin = "state:06",
vars = c("EmpS","EarnS","industry","ind_level"),
time = years
) %>%
filter(ind_level == 4) %>%
mutate(
year = substr(time,1,4)
) %>%
left_join(label_industry, by= "industry") %>%
group_by(year,industry,label) %>%
summarize(
EmpS = round(mean(as.numeric(EmpS), na.rm = TRUE),0),
EarnS = round(mean(as.numeric(EarnS), na.rm = TRUE),0)
) %>%
filter(!is.na(EmpS) & EmpS != 0)
qwi_scc<-
rbind(qwi_scc,qwi)
}
# here I will use the Delaware information on essential businesses for the moment
DE_essential <- read_csv('/Users/simonespeizer/pCloud Drive/Shared/SFBI/Data Library/Essential Designations/delaware_essential_designations.csv')
# grouping used in previous version of this method, also used for method 2 below
DE_essential_grouped <- DE_essential %>% mutate(NAICS_2_dig = substr(NAICS, 1, 2)) %>%
full_join(LODES_keys, by = c('NAICS_2_dig' = 'LODES_NAICS')) %>%
group_by(LODES_variable, Essential) %>%
summarize(Count = n()) %>% # get number of subcodes within each 2 digit code that are essential and nonessential
ungroup() %>%
mutate(Essential = replace_na(Essential, FALSE)) %>% # assume if not listed it is not essential
spread(Essential, Count, fill = 0) %>%
rename(c("Essential" = "TRUE", "Nonessential" = "FALSE")) %>%
mutate(totalCount = Essential + Nonessential, fracEssential = Essential / totalCount)
# group the Delaware codes and weight the fraction of each 2 digit group designated as essential by the proportion of workers that group that are from "essential" 4 digit groups based on data for Santa Clara Couty. Note that this ignores the 6 digit codes within the 4 digit codes (they will have NAs in their number of workers, and so get filled with zeros)
DE_essential_weighted_grouped <- DE_essential %>%
mutate(NAICS = str_sub(NAICS, 1,)) %>%
full_join(qwi_scc, by = c('NAICS' = 'industry')) %>%
mutate(NAICS_2_dig = substr(NAICS, 1, 2)) %>%
full_join(LODES_keys, by = c('NAICS_2_dig' = 'LODES_NAICS')) %>%
mutate(EmpS = replace_na(EmpS, 0), Essential = replace_na(Essential, FALSE)) %>% # assume if not listed it is not essential, and if no employees listed that the number is zero
group_by(LODES_variable, Essential) %>%
summarize(totalWorkers = sum(EmpS)) %>% # get number of subcodes within each 2 digit code that are essential and nonessential and total workers essential/nonessential
ungroup() %>%
spread(Essential, totalWorkers, fill = 0) %>%
rename(c("Essential" = "TRUE", "Nonessential" = "FALSE")) %>%
mutate(totalCount = Essential + Nonessential, fracEssential = Essential / totalCount)
# note that the Delaware list didn't include 92, public administration, as essential - but I'm going to assume that's essential and manually enter that here
DE_essential_weighted_grouped$fracEssential[20] = 1.0
# get essential breakdown in San Jose
sj_rac_essential <- sj_rac %>% gather("Category", "Number", CNS01:CNS20) %>%
left_join(DE_essential_weighted_grouped %>% dplyr::select(fracEssential, LODES_variable), by = c("Category" = "LODES_variable")) %>%
mutate(numEssential = fracEssential * Number) %>%
group_by(h_bg, C000) %>%
summarize(totalEssential = sum(numEssential)) %>%
ungroup() %>%
mutate(fracEssential = round((totalEssential / C000)*100, 1))
Map of method 1
# set up palette
blue_pal <- colorNumeric(
palette = "Blues",
domain =
c(sj_rac_essential %>%
pull(fracEssential) %>%
unique())
)
map <- leaflet(data = sj_rac_essential) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(
data =
sj_rac_essential %>%
left_join(sj_blockgroups, by = c("h_bg" = "GEOID")) %>%
st_as_sf() %>% st_set_crs(4326),
fillColor = ~blue_pal(fracEssential),
color = "white",
weight = 0.25,
fillOpacity = 0.5,
label = ~paste0(fracEssential,"% of workers that are essential"),
highlightOptions = highlightOptions(
weight = 2,
opacity = 1
)
)
map
Method 2: Only count as essential the 2 digit NAICS codes with >=80% of the 4 digit sub-codes marked as essential
# grouping used in previous version of method 1, also used for method 2
DE_essential_grouped <- DE_essential %>% mutate(NAICS_2_dig = substr(NAICS, 1, 2)) %>%
full_join(LODES_keys, by = c('NAICS_2_dig' = 'LODES_NAICS')) %>%
group_by(LODES_variable, Essential) %>%
summarize(Count = n()) %>% # get number of subcodes within each 2 digit code that are essential and nonessential
ungroup() %>%
mutate(Essential = replace_na(Essential, FALSE)) %>% # assume if not listed it is not essential
spread(Essential, Count, fill = 0) %>%
rename(c("Essential" = "TRUE", "Nonessential" = "FALSE")) %>%
mutate(totalCount = Essential + Nonessential, fracEssential = Essential / totalCount)
# again make 92 be counted as essential manually
DE_essential_grouped$fracEssential[20] = 1.0
DE_binary_essential <- DE_essential_grouped %>% mutate(binaryEssential = fracEssential >= 0.8)
sj_rac_essential_2 <- sj_rac %>% gather("Category", "Number", CNS01:CNS20) %>%
left_join(DE_binary_essential %>% dplyr::select(binaryEssential, LODES_variable), by = c("Category" = "LODES_variable")) %>%
filter(binaryEssential == TRUE) %>%
group_by(h_bg, C000) %>%
summarize(totalEssential = sum(Number)) %>%
ungroup() %>%
mutate(fracEssential = round((totalEssential / C000)*100, 1))
Map of method 2
# set up palette
blue_pal <- colorNumeric(
palette = "Blues",
domain =
c(sj_rac_essential_2 %>%
pull(fracEssential) %>%
unique())
)
map <- leaflet(data = sj_rac_essential_2) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(
data =
sj_rac_essential_2 %>%
left_join(sj_blockgroups, by = c("h_bg" = "GEOID")) %>%
st_as_sf() %>% st_set_crs(4326),
fillColor = ~blue_pal(fracEssential),
color = "white",
weight = 0.25,
fillOpacity = 0.5,
label = ~paste0(fracEssential,"% of workers that are essential"),
highlightOptions = highlightOptions(
weight = 2,
opacity = 1
)
)
map